#######################################################################################################
# Autor: Jan Stuckatz + Jieun Lee
# Date: 2023/05/10
# Paper Title: Mobilization and Strategies: Comparing Trade Lobbying in the U.S. and Canada
# Replication of tables and figures in main text
######################################################################################################



# required libraries
library(lubridate)
library(knitr)
library(kableExtra)
library(ggplot2)  
library(lfe)
library(sandwich)
library(lmtest)
library(readr)
library(stargazer)
library(dplyr)

# set wd to where code is, if using RStudio
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))




#### Table 2 ####
usa <- readr::read_csv("../data/usa_clients.csv")
can <- readr::read_csv("../data/can_clients.csv")



# input panel data, for subsetting to 2016 to 2020, USMCA
# USA
us_2016_2020 <- readr::read_csv("../data/panel_usa.csv") %>%
  filter(year_q %in% c("2016-Q1", "2016-Q2", "2016-Q3", "2016-Q4",
                       "2017-Q1", "2017-Q2", "2017-Q3", "2017-Q4",
                       "2018-Q1", "2018-Q2", "2018-Q3", "2018-Q4",
                       "2019-Q1", "2019-Q2", "2019-Q3", "2019-Q4",
                       "2020-Q1", "2020-Q2"))

# CAN
ca_2016_2020 <- readr::read_csv("../data/panel_can.csv") %>%
  filter(year_q_ch %in% c("2016-Q1", "2016-Q2", "2016-Q3", "2016-Q4",
                          "2017-Q1", "2017-Q2", "2017-Q3", "2017-Q4",
                          "2018-Q1", "2018-Q2", "2018-Q3", "2018-Q4",
                          "2019-Q1", "2019-Q2", "2019-Q3", "2019-Q4",
                          "2020-Q1", "2020-Q2"))

# Subset by CA and US clients between 2016 and 2020
usa <- usa %>% filter(!is.na(bvdid_final)) %>%
  filter(bvdid_final %in% unique(us_2016_2020$bvdid_final[us_2016_2020$usmca==1]))
can <- can %>% filter(!is.na(bvdid_final)) %>%
  filter(bvdid_final %in% unique(ca_2016_2020$bvdid_final[ca_2016_2020$usmca==1]))


#### Companies Lobbyng in Both CAN and USA
can$both <- ifelse(can$bvdid_final %in% usa$bvdid_final, 1, 0)
usa$both <- ifelse(usa$bvdid_final %in% can$bvdid_final, 1, 0) 

# get unique firms lobbying on both sides, check they are the same.
us_both_unique <- sort(unique(usa$bvdid_final[usa$both==1]))
ca_both_unique <- sort(unique(can$bvdid_final[can$both==1]))

# create df with unique clients on both sides and some descriptives
clients_both <- can %>% filter(both == 1) %>%
  select(bvdid_final, org_type, orbis_name_final, GUO_Name, GUO_bvdid) %>%
  distinct()


# reduced df for table below
usa_count <- usa %>%
  select(bvdid_final, orbis_name_final, org_type) %>%
  distinct() %>% 
  mutate(ISO2 = substr(bvdid_final, 1, 2),
         tpp = ifelse(bvdid_final %in% unique(usa$bvdid_final[usa$tpp == 1]) , 1, 0),
         usmca = ifelse(bvdid_final %in% unique(usa$bvdid_final[usa$usmca == 1]), 1, 0))



# For Descriptive Purposes
tab1 <- data.frame(`Org_Type` = c("Corporation", "Ideological Group", 
                                  "Peak Association", "Public" , "Trade Association", 
                                  "Union", "Total"),
                   `all_USMCA` = rep(NA,7),
                   `perc_USMCA` = rep(NA,7),
                   `us_USMCA` = rep(NA,7),
                   `us_perc` = rep(NA,7),
                   `ca_USMCA` = rep(NA,7),
                   `ca_perc` = rep(NA,7),
                   `both_USMCA` = rep(NA,7),
                   `both_perc` = rep(NA,7), stringsAsFactors = FALSE)
# USA
tab1$`us_USMCA` <- c(table(usa_count$org_type[!is.na(usa_count$bvdid_final) & usa_count$usmca==1]), sum(table(usa_count$org_type[!is.na(usa_count$bvdid_final) & usa_count$usmca==1])))
tab1$us_perc[1:6] <- round(tab1$us_USMCA[1:6]/tab1$us_USMCA[7], 2)


# Canada
tab1$`ca_USMCA`[1] <- length(unique(can$bvdid_final[can$org_type=="Corporation" & can$usmca==1]))
tab1$`ca_USMCA`[2] <- length(unique(can$bvdid_final[can$org_type=="Ideological Group" & can$usmca==1]))
tab1$`ca_USMCA`[3] <- length(unique(can$bvdid_final[can$org_type=="Peak Association" & can$usmca==1]))
tab1$`ca_USMCA`[4] <- length(unique(can$bvdid_final[can$org_type=="Public" & can$usmca==1]))
tab1$`ca_USMCA`[5] <- length(unique(can$bvdid_final[can$org_type=="Trade Association" & can$usmca==1]))
tab1$`ca_USMCA`[6] <- length(unique(can$bvdid_final[can$org_type=="Union" & can$usmca==1]))
tab1$`ca_USMCA`[7] <- length(unique(can$bvdid_final[can$usmca==1]))

tab1$ca_perc[1:6] <- round(tab1$ca_USMCA[1:6]/tab1$ca_USMCA[7], 2)

# Both Countries
tab1$`all_USMCA`[1] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Corporation" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Corporation" & can$usmca==1])))
tab1$`all_USMCA`[2] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Ideological Group" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Ideological Group" & can$usmca==1])))
tab1$`all_USMCA`[3] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Peak Association" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Peak Association" & can$usmca==1])))
tab1$`all_USMCA`[4] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Public" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Public" & can$usmca==1])))
tab1$`all_USMCA`[5] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Trade Association" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Trade Association" & can$usmca==1])))
tab1$`all_USMCA`[6] <- length(unique(c(usa_count$bvdid_final[usa_count$org_type=="Union" & usa_count$usmca==1], can$bvdid_final[can$org_type=="Union" & can$usmca==1])))
tab1$`all_USMCA`[7] <- length(unique(c(usa_count$bvdid_final[usa_count$usmca==1], can$bvdid_final[can$usmca==1])))

tab1$perc_USMCA[1:6] <- round(tab1$all_USMCA[1:6]/tab1$all_USMCA[7], 2)


# Which clients in USA also lobby in Canada, which Canadian Clients also Lobby in the US?
can$lob_usa <- ifelse(can$bvdid_final %in% usa_count$bvdid_final, 1, 0)
can$lob_usa_tpp <- ifelse(can$bvdid_final %in% usa_count$bvdid_final[usa_count$tpp==1], 1, 0)
can$lob_usa_usmca <- ifelse(can$bvdid_final %in% usa_count$bvdid_final[usa_count$usmca==1], 1, 0)

usa_count$lob_can <- ifelse(usa_count$bvdid_final %in% can$bvdid_final, 1, 0)
usa_count$lob_can_tpp <- ifelse(usa_count$bvdid_final %in% can$bvdid_final[can$tpp==1], 1, 0)
usa_count$lob_can_usmca <- ifelse(usa_count$bvdid_final %in% can$bvdid_final[can$usmca==1], 1, 0)


# Table for both
tab1$both_USMCA[1] <- sum(unique(usa_count$bvdid_final[usa_count$firm==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$firm==1 & can$usmca==1]))
tab1$both_USMCA[2] <- sum(unique(usa_count$bvdid_final[usa_count$ideol_group==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$ideol_group==1 & can$usmca==1]))
tab1$both_USMCA[3] <- sum(unique(usa_count$bvdid_final[usa_count$peak_assoc==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$peak_assoc==1 & can$usmca==1]))
tab1$both_USMCA[4] <- sum(unique(usa_count$bvdid_final[usa_count$public==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$public==1 & can$usmca==1]))
tab1$both_USMCA[5] <- sum(unique(usa_count$bvdid_final[usa_count$trade_assoc==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$trade_assoc==1 & can$usmca==1]))
tab1$both_USMCA[6] <- sum(unique(usa_count$bvdid_final[usa_count$union==1 & usa_count$usmca==1]) %in% unique(can$bvdid_final[can$union==1 & can$usmca==1]))
tab1$both_USMCA[7] <- sum(unique(usa_count$bvdid_final[usa_count$usmca==1]) %in% unique(can$bvdid_final[can$usmca==1]))

tab1$both_perc[1:6] <- round(tab1$both_USMCA[1:6]/tab1$both_USMCA[7], 2)


# Print Table to LaTeX File
library(knitr)
library(kableExtra)

options(knitr.kable.NA = '')
kableExtra::kable(tab1, "latex", booktabs = T, digits = 2, linesep = "\\addlinespace",
                  col.names = c("Organisation Type", rep(c("Clients", "Share"), 4)), 
                  caption = "Types of Organisations Lobbying USMCA in the U.S. and Canada, 2016-2020", label = "tab1") %>%
  footnote(general = "The table shows the types of organizations lobbying on the USMCA in the U.S. in Canada between the first quarter of 2016 and the second quarter of 2020.", threeparttable = T) %>%
  kableExtra::kable_styling(latex_options = c( "hold_position")) %>%
  add_header_above(c("", "Overall" = 2, "in United States" = 2, "in Canada" = 2, "in Both" = 2), bold = TRUE) %>%
  add_header_above(c("", "Unique Clients Lobbying the USMCA:" = 8), bold = TRUE) %>%
  column_spec(1, width = "9em") %>%
  save_kable(file = "../output/table_2.tex")




#### Figure 1 ####
ca_naics_plot <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/ca_naics_count.csv")
us_naics_plot <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/us_naics_count.csv")


# USA
naics_hist_us <- ggplot(us_naics_plot, aes(fill=type, y=value, x=reorder(naics3, -value))) + 
  geom_bar(position="stack", stat="identity") + 
  ylab("Unique 6-Digit NAICS Codes") +
  xlab("3-Digit NAICS Code") +
  scale_fill_manual(values = c("#999999", "#56B4E9", "#0072B2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle=45, vjust=3.75, hjust=2, size = 7, colour = "black"),
        axis.text.y = element_text(colour = "black", size = 15),
        legend.position = c(0.5, 0.9),
        legend.direction = "horizontal",
        legend.title = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(colour = "lightgrey"),
        axis.ticks.y = element_line(colour = "black",),
        legend.text = element_text(colour = "black", size = 16),
        axis.title.x = element_blank(),
        axis.title.y = element_text(colour = "black", face = "bold", size = 17)) 

ggsave(naics_hist_us, filename = "../output/fig_1us.pdf", scale = 1.75, width = 7.5, height = 6)
sum(us_naics_plot$value) # 589 industries mobilizing in the US


# Canada
naics_hist_ca <- ggplot(ca_naics_plot, aes(fill=type, y=value, x=reorder(naics3, -value))) + 
  geom_bar(position="stack", stat="identity") + 
  ylab("Unique 6-Digit NAICS Codes") +
  xlab("3-Digit NAICS Code") +
  ylim(0, 40) +
  scale_fill_manual(values = c("#999999", "#56B4E9", "#0072B2")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle=45, vjust=3.5, hjust=2, size = 9, colour = "black"),
        axis.text.y = element_text(colour = "black", size = 15),
        #legend.position = c(0.5, 0.9),
        legend.position = "none",
        legend.direction = "horizontal",
        legend.title = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(colour = "lightgrey"),
        axis.ticks.y = element_line(colour = "black",),
        #legend.text = element_text(colour = "black", size = 16),
        axis.title.x = element_text(colour = "black", face = "bold", size = 17),
        axis.title.y = element_text(colour = "black", face = "bold", size = 17)) 

ggsave(naics_hist_ca, filename = "../output/fig_1ca.pdf", scale = 1.75, width = 7.5, height = 6)
sum(ca_naics_plot$value) # 216 industries mobilizing in CA




#### Table 3 #####
library(lfe)
library(sandwich)
library(lmtest)

# read data
Mobil <- readr::read_csv("../data/mobilization_ca_us_20211022.csv")


### USA: Lobby with Association
### USA: Assoc Lobby
lm1 <- felm(I(us_usmca_assoc == 1) ~ factor(diff_osg_imp) + log(ind_sale) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm2 <- felm(I(us_usmca_assoc == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm3 <- felm(I(us_usmca_assoc == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | naics2d | 0 | 0, data = Mobil) # Lobby Alone
### USA: Firm Lobby
lm4 <- felm(I(us_usmca_corp == 1) ~ factor(diff_osg_imp) + log(ind_sale) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm5 <- felm(I(us_usmca_corp == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm6 <- felm(I(us_usmca_corp == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | naics2d | 0 | 0, data = Mobil) # Lobby Alone

# robust SEs for table
cov1    <- vcovHC(lm1, type = "HC1") 
robust_se1   <- sqrt(diag(cov1))
cov2    <- vcovHC(lm2, type = "HC1") 
robust_se2   <- sqrt(diag(cov2))
cov3    <- vcovHC(lm3, type = "HC1") 
robust_se3   <- sqrt(diag(cov3))
cov4    <- vcovHC(lm4, type = "HC1") 
robust_se4   <- sqrt(diag(cov4))
cov5    <- vcovHC(lm5, type = "HC1") 
robust_se5   <- sqrt(diag(cov5))
cov6    <- vcovHC(lm6, type = "HC1") 
robust_se6   <- sqrt(diag(cov6))


stargazer::stargazer(lm1, lm2, lm3, lm4, lm5, lm6, type = "latex", title = "Mobilization during USMCA in the United States", label = "mob_us2",
                     out = "../output/table_3.tex",
                     covariate.labels = c("4-Firm Concentr. Ratio", "Mod.Differentiated","Differentiated", "Log(Sales)"), 
                     dep.var.labels = c("Association Mobilization (1/0)", "Firm Mobilization (1/0)"),
                     se = list(robust_se1, robust_se2, robust_se3,
                               robust_se4, robust_se5, robust_se6),
                     add.lines = list(c("2-digit NAICS FEs",  "", "","", "$\\checkmark$", "", "","", "$\\checkmark$")),
                     omit.stat = c("rsq", "ser"),
                     #omit = c("same_state", "cba", "majority", "subcom_chair", "com_chair", "power_com", "majority_leader",  "minority_leader"),
                     column.sep.width = "3pt", notes = "All estimates are OLS. Robust standard errors in parentheses.", notes.align = "l")



#### Table 4 ####

### CAN: Assoc Lobby
lm1 <- felm(I(ca_usmca_assoc == 1) ~ factor(diff_osg_imp) + log(ind_sale) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm2 <- felm(I(ca_usmca_assoc == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm3 <- felm(I(ca_usmca_assoc == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | naics2d | 0 | 0, data = Mobil) # Lobby Alone
### CAN: Firm Lobby
lm4 <- felm(I(ca_usmca_corp == 1) ~ factor(diff_osg_imp) + log(ind_sale) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm5 <- felm(I(ca_usmca_corp == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | 0 | 0 | 0, data = Mobil) # Lobby with Assoc
lm6 <- felm(I(ca_usmca_corp == 1) ~ CR4 + log(ind_sale) + factor(diff_osg_imp) | naics2d | 0 | 0, data = Mobil) # Lobby Alone

# robust SEs for table
cov1    <- vcovHC(lm1, type = "HC1") 
robust_se1   <- sqrt(diag(cov1))
cov2    <- vcovHC(lm2, type = "HC1") 
robust_se2   <- sqrt(diag(cov2))
cov3    <- vcovHC(lm3, type = "HC1") 
robust_se3   <- sqrt(diag(cov3))
cov4    <- vcovHC(lm4, type = "HC1") 
robust_se4   <- sqrt(diag(cov4))
cov5    <- vcovHC(lm5, type = "HC1") 
robust_se5   <- sqrt(diag(cov5))
cov6    <- vcovHC(lm6, type = "HC1") 
robust_se6   <- sqrt(diag(cov6))


stargazer::stargazer(lm1, lm2, lm3, lm4, lm5, lm6, type = "latex", title = "Mobilization during USMCA in Canada", label = "mob_ca2",
                     out = "../output/table_4.tex",
                     covariate.labels = c("4-Firm Concentr. Ratio", "Mod.Differentiated","Differentiated", "Log(Sales)"), 
                     dep.var.labels = c("Association Mobilization (1/0)", "Firm Mobilization (1/0)"),
                     se = list(robust_se1, robust_se2, robust_se3,
                               robust_se4, robust_se5, robust_se6),
                     add.lines = list(c("2-digit NAICS FEs",  "", "","", "$\\checkmark$", "", "","", "$\\checkmark$")),
                     omit.stat = c("rsq", "ser"),
                     #omit = c("same_state", "cba", "majority", "subcom_chair", "com_chair", "power_com", "majority_leader",  "minority_leader"),
                     column.sep.width = "3pt", notes = "All estimates are OLS. Robust standard errors in parentheses.", notes.align = "l")


rm(lm1, lm2, lm3, lm4, lm5, lm6,
   cov1, cov2, cov3, cov4, cov5, cov6,
   robust_se1, robust_se2, robust_se3, robust_se4, robust_se5, robust_se6)




#### Figure 2 ####
# CAN read lobbying of institutions data
ca_clients_inst <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/can_clients_inst.csv")

# USA, read lobbying of institutions data
us_comms <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/usa_clients_comms.csv")

# aggregate at the client level, subet to 2016 to 2020
parl_us <- us_comms %>%
  # filter(report_year > = 2012) %>%
  filter(!is.na(bvdid_final), usmca == 1) %>%
  group_by(bvdid_final, orbis_name_final) %>%
  summarise(pres = sum(c(whouse+execpr), na.rm = TRUE),
            house = sum(house, na.rm = TRUE),
            senate = sum(senate, na.rm = TRUE),
            ustr = sum(ustr, na.rm = TRUE),
            doc = sum(doc, na.rm = TRUE ),
            dag = sum(dag, na.rm = TRUE),
            inst = sum(n_gov_entities, na.rm = TRUE),
            b_pres = ifelse(sum(c(whouse+execpr), na.rm = TRUE) > 0, 1, 0),
            b_house = ifelse(sum(house, na.rm = TRUE) > 0, 1, 0),
            b_senate = ifelse(sum(senate, na.rm = TRUE) > 0, 1, 0),
            b_ustr = ifelse(sum(ustr, na.rm = TRUE) > 0, 1, 0),
            b_doc = ifelse(sum(doc, na.rm = TRUE ) > 0, 1, 0),
            b_dag = ifelse(sum(dag, na.rm = TRUE) > 0, 1, 0),
            usmca = ifelse(sum(usmca, na.rm = TRUE) > 0, 1, 0),
            s_pres = sum(c(whouse+execpr), na.rm = TRUE)/sum(n_gov_entities, na.rm = TRUE),
            s_house = sum(house, na.rm = TRUE)/sum(n_gov_entities, na.rm = TRUE),
            s_senate = sum(senate, na.rm = TRUE)/sum(n_gov_entities, na.rm = TRUE),
            s_ustr = sum(ustr, na.rm = TRUE)/sum(n_gov_entities, na.rm = TRUE),
            s_doc = sum(doc, na.rm = TRUE )/sum(n_gov_entities, na.rm = TRUE),
            s_dag = sum(dag, na.rm = TRUE)/sum(n_gov_entities, na.rm = TRUE)) %>%
  filter(!is.na(bvdid_final)) %>%
  filter(bvdid_final %in% unique(us_2016_2020$bvdid_final[us_2016_2020$usmca==1]))

# CAN, USMCA clients, 2016-2020
ca_usmca <- ca_clients_inst %>% 
  filter(bvdid_final %in% unique(ca_2016_2020$bvdid_final[ca_2016_2020$usmca==1]))
# USA, USMCA clients, 2016-2020
us_usmca <- parl_us %>% filter(usmca == 1)


# Canada
fig2ca <- ca_usmca %>%
  mutate(country = "CA") %>%
  group_by(bvdid_final, orbis_name_final, country) %>%
  summarise(pm = ifelse(b_pm == 1, 1, 0),
            exe = ifelse(gaf > 0 | dfaitc > 0 | foraff > 0 | aafc > 0 | ised > 0 | fin > 0, 1, 0),
            house = ifelse(house > 0, 1, 0),
            senate = ifelse(senate > 0, 1, 0)) %>%
  ungroup() %>%
  summarise(pres = sum(pm)/n(),
            exe = sum(exe)/n(),
            house = sum(house)/n(),
            senate = sum(senate)/n())
# USA
fig2us <- us_usmca %>%
  group_by(bvdid_final, orbis_name_final) %>%
  summarise(pres = ifelse(b_pres == 1, 1, 0),
            exe = ifelse(ustr > 0 | doc > 0 | dag > 0, 1, 0),
            house = ifelse(house > 0, 1, 0),
            senate = ifelse(senate > 0, 1, 0)) %>%
  ungroup() %>%
  summarise(pres = sum(pres)/n(),
            exe = sum(exe)/n(),
            house = sum(house)/n(),
            senate = sum(senate)/n())


# DF for plotting
df <- data.frame(Country = rep(c("United States (n=716)", "Canada (n=138)"), each = 4),
                 Institution = rep(c("President\nor\nPrime Minister", "Bureaucracy", "House", "Senate"),  2),
                 Lobby = round(unlist(c(fig2us, fig2ca)), 2)) %>%
  mutate(Country = factor(Country, levels = c("United States (n=716)", "Canada (n=138)")),
         Institution = factor(Institution, levels = c("House", "Senate", "President\nor\nPrime Minister", "Bureaucracy")))

library(ggplot2)  
ggplot(df, aes(x = Institution, y = Lobby, fill = Country) ) +
  geom_col(position = position_dodge(0.8), width = 0.8) +
  scale_fill_grey() +
  theme_minimal() +
  xlab("\nInstitution Lobbied") +
  ylab("Share of Clients\n") +
  theme(axis.text = element_text(face = "plain", colour = "black", size = 16)) +
  theme(axis.title = element_text(face = "bold", colour = "black", size = 19)) +
  theme(legend.position="top") +
  theme(legend.title = element_blank()) +
  theme(legend.text =  element_text(face = "plain", colour = "black", size = 17)) 

ggsave(filename = "../output/fig_2.pdf", width = 9, height = 6.5)
dev.off()




#### Figure 3 ####
# US + Canadian lobbyisys-clients file
ca_lobbyists_clients <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/can_clients_lobbyists.csv")
us_lobbyists_clients <- readr::read_csv("C:/Users/js.egb/Dropbox/Uni_Projects/us_can_nafta_lobby/submission/CPS/replication_material/data/usa_clients_lobbyists.csv")

# subset to USMCA
us_clients <- us_lobbyists_clients %>% filter(usmca == 1)
ca_clients <- ca_lobbyists_clients %>% filter(usmca == 1)

# Density Plots for usage of Lobbyists
us_clients <- us_clients[,c(names(ca_clients))] # keep same variables
us_clients$ID <- 1:dim(us_clients)[1] # indicator for country and new ID
ca_clients$ID <- c((dim(us_clients)[1]+1):(dim(us_clients)[1]+dim(ca_clients)[1])) # indicator for country and new ID
us_clients$country <- "United States" # indicator for US/CA
ca_clients$country <- "Canada"
clients <- dplyr::bind_rows(us_clients, ca_clients) %>% # rbind data
  mutate(n_lob = n_xtlob + n_ihlob)


# All Lobbyists
fig3a <- ggplot(clients, aes(x=n_lob, fill=country)) +
  geom_density(alpha = 0.4, adjust = 0.7, size = 0, color = NA) +
  theme_bw() +
  scale_x_log10(breaks=c(0, 1, 10,100, 1000), limits = c(0.5,300)) +
  scale_y_continuous(breaks=c(seq(0,1.25, 0.25)), limits = c(0,1.25)) +
  scale_fill_manual("Country", 
                    values = c(gray.colors(3, 0.2, 0.6)[3],gray.colors(3, 0.2, 0.6)[1]),
                    labels = c("Canada", "United States")) +
  geom_vline(xintercept = mean(clients$n_lob[clients$country=="Canada"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[3], size = 1) +
  geom_vline(xintercept = mean(clients$n_lob[clients$country=="United States"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[1], size = 1) +
  annotate("text", x=mean(clients$n_lob[clients$country=="Canada"])-2.25, y=1, label=paste0(round(mean(clients$n_lob[clients$country=="Canada"]), 1)), size=6, angle = 0, color = gray.colors(3, 0.2, 0.6)[2]) +
  annotate("text", x=mean(clients$n_lob[clients$country=="United States"])+5.1, y=1, label=paste0(round(mean(clients$n_lob[clients$country=="United States"]), 1)), size=6, angle = 0, color = "black") +
  xlab('Log(10)(# Lobbyists)') +
  ylab("Density\n") +
  ggtitle("All Lobbyists") +
  theme(legend.title = element_blank(), 
        legend.text = element_text(color = "black", size = 16),
        axis.text = element_text(color = "black", size = 16),
        axis.title = element_text(color = "black", face = "bold", size = 19),
        plot.title = element_text(color = "black", face = "bold", size = 21, hjust = 0.5),
        legend.position = "top",
        panel.border = element_blank(),
        axis.ticks.y=element_blank()) 


# In House Lobbyists
fig3b <- ggplot(clients, aes(x=n_ihlob, fill=country)) +
  geom_density(alpha = 0.4, adjust = 0.7, size = 0, color = NA) +
  theme_bw() +
  scale_x_log10(breaks=c(0, 1, 10,100, 1000), limits = c(0.5,300)) +
  scale_y_continuous(breaks=c(seq(0,1.25, 0.25)), limits = c(0,1.25)) +
  scale_fill_manual("Country", 
                    values = c(gray.colors(3, 0.2, 0.6)[3],gray.colors(3, 0.2, 0.6)[1]),
                    labels = c("Canada", "United States")) +
  geom_vline(xintercept = mean(clients$n_ihlob[clients$country=="Canada"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[3], size = 1) +
  geom_vline(xintercept = mean(clients$n_ihlob[clients$country=="United States"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[1], size = 1) +
  annotate("text", x=mean(clients$n_ihlob[clients$country=="Canada"], na.rm = TRUE)+2.1, y=1, label=paste0(round(mean(clients$n_ihlob[clients$country=="Canada"], na.rm = TRUE), 1)), size=6, angle = 0, color = gray.colors(3, 0.2, 0.6)[2]) +
  annotate("text", x=mean(clients$n_ihlob[clients$country=="United States"],na.rm = TRUE)-1.6, y=1, label=paste0(round(mean(clients$n_ihlob[clients$country=="United States"], na.rm = TRUE), 1)), size=6, angle = 0, color = "black") +
  xlab('Log(10)(# Lobbyists)') +
  ylab("Density\n") +
  ggtitle("In-House Lobbyists") +
  theme(legend.title = element_blank(), 
        legend.text = element_text(color = "black", size = 16),
        axis.text = element_text(color = "black", size = 16),
        axis.title = element_text(color = "black", face = "bold", size = 19),
        axis.title.y = element_blank(),
        plot.title = element_text(color = "black", face = "bold", size = 21, hjust = 0.5),
        legend.position = "top",
        panel.border = element_blank(),
        axis.ticks.y=element_blank()) 


fig3c <- ggplot(clients, aes(x=n_xtlob, fill=country)) +
  geom_density(alpha = 0.4, adjust = 0.7, size = 0, color = NA) +
  theme_bw() +
  scale_x_log10(breaks=c(0, 1, 10,100, 1000), limits = c(0.5,300)) +
  scale_y_continuous(breaks=c(seq(0,3, 0.5)), limits = c(0,3)) +
  scale_fill_manual("Country", 
                    values = c(gray.colors(3, 0.2, 0.6)[3],gray.colors(3, 0.2, 0.6)[1]),
                    labels = c("Canada", "United States")) +
  geom_vline(xintercept = mean(clients$n_xtlob[clients$country=="Canada"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[3], size = 1) +
  geom_vline(xintercept = mean(clients$n_xtlob[clients$country=="United States"]), linetype = "dashed", color = gray.colors(3, 0.2, 0.6)[1], size = 1) +
  annotate("text", x=mean(clients$n_xtlob[clients$country=="Canada"], na.rm = TRUE)-0.5, y=2.25, label=paste0(round(mean(clients$n_xtlob[clients$country=="Canada"], na.rm = TRUE), 1)), size=6, angle = 0, color = gray.colors(3, 0.2, 0.6)[2]) +
  annotate("text", x=mean(clients$n_xtlob[clients$country=="United States"], na.rm = TRUE)+1.3, y=2.25, label=paste0(round(mean(clients$n_xtlob[clients$country=="United States"], na.rm = TRUE), 1)), size=6, angle = 0, color = "black") +
  xlab('Log(10)(# Lobbyists)') +
  ylab("Density\n") +
  ggtitle("External Lobbyists") +
  theme(legend.title = element_blank(), 
        legend.text = element_text(color = "black", size = 16),
        axis.text = element_text(color = "black", size = 16),
        axis.title = element_text(color = "black", face = "bold", size = 19),
        axis.title.y = element_blank(),
        plot.title = element_text(color = "black", face = "bold", size = 21, hjust = 0.5),
        legend.position = "top",
        panel.border = element_blank(),
        axis.ticks.y=element_blank()) 


ggsave(fig3a, device = "pdf", dpi = 2400, filename = "../output/fig_3a.pdf", width = 6.5, height = 5)
ggsave(fig3b, device = "pdf", dpi = 2400, filename = "../output/fig_3b.pdf", width = 6.5, height = 5)
ggsave(fig3c, device = "pdf", dpi = 2400, filename = "../output/fig_3c.pdf", width = 6.5, height = 5)




#### Table 5 ####
library(lfe)
library(sandwich)
library(lmtest)
ca_lobs <- readr::read_csv("../data/ca_lobbyists.csv")
us_lobs <- readr::read_csv("../data/us_lobbyists.csv")


# Common Table w. USA/CAN for comparison. Leave out Peak Associations
us_lobs$us_xtlob_sh <- us_lobs$n_xtlob/us_lobs$n_lob
lm1 <- felm(us_xtlob_sh ~ log(restrictions)   | report_year | 0 | 0, data = us_lobs[us_lobs$org_type != "Peak Association",]) # 
lm2 <- felm(us_xtlob_sh ~ log(restrictions) + log(n_issues)  | report_year| 0 | 0, data = us_lobs[us_lobs$org_type != "Peak Association",]) # 
lm3 <- felm(us_xtlob_sh ~ log(restrictions) + log(n_issues) + log(ind_sale)   | report_year  | 0 | 0, data = us_lobs[us_lobs$org_type != "Peak Association",]) # 
lm4 <- felm(us_xtlob_sh ~ log(restrictions) + log(n_issues) + log(ind_sale)  | report_year + naics2 | 0 | 0, data = us_lobs[us_lobs$org_type != "Peak Association",]) # 

ca_lobs$ca_xtlob_sh <- ca_lobs$n_xtlob/ca_lobs$n_lob
lm5 <- felm(ca_xtlob_sh ~ log(restrictions)   | year | 0 | 0, data = ca_lobs[ca_lobs$org_type != "Peak Association",]) # 
lm6 <- felm(ca_xtlob_sh ~ log(restrictions) + log(n_issues)  | year| 0 | 0, data = ca_lobs[ca_lobs$org_type != "Peak Association",]) # 
lm7 <- felm(ca_xtlob_sh ~ log(restrictions) + log(n_issues) + log(ind_sale)   | year  | 0 | 0, data = ca_lobs[ca_lobs$org_type != "Peak Association",]) # 
lm8 <- felm(ca_xtlob_sh ~ log(restrictions) + log(n_issues) + log(ind_sale)  | year + naics2 | 0 | 0, data = ca_lobs[ca_lobs$org_type != "Peak Association",]) # 

cov1    <- vcovHC(lm1, type = "HC1") 
robust_se1   <- sqrt(diag(cov1))
cov2    <- vcovHC(lm2, type = "HC1") 
robust_se2   <- sqrt(diag(cov2))
cov3    <- vcovHC(lm3, type = "HC1") 
robust_se3   <- sqrt(diag(cov3))
cov4    <- vcovHC(lm4, type = "HC1") 
robust_se4   <- sqrt(diag(cov4))
cov5    <- vcovHC(lm5, type = "HC1") 
robust_se5   <- sqrt(diag(cov5))
cov6    <- vcovHC(lm6, type = "HC1") 
robust_se6   <- sqrt(diag(cov6))
cov7    <- vcovHC(lm7, type = "HC1") 
robust_se7   <- sqrt(diag(cov7))
cov8    <- vcovHC(lm8, type = "HC1") 
robust_se8   <- sqrt(diag(cov8))

stargazer::stargazer(lm1, lm2, lm3, lm4, lm5, lm6, lm7, lm8, type = "latex", title = "External Lobbyist Usage during USMCA Lobbying in the United States and Canada", label = "tab_5",
                     out = "../output/table_5.tex",
                     covariate.labels = c("Log(restrictions)", "Log(Issues Lobbied)", "Log(Sales)"), 
                     dep.var.labels = c("United States, Ext. Lobbyist Share", "Canada, Ext. Lobbyist Share"),
                     se = list(robust_se1, robust_se2, robust_se3, robust_se4,
                               robust_se5, robust_se6, robust_se7, robust_se8),
                     add.lines = list(c("Year FEs",  rep("$\\checkmark$", 8)),
                                      c("2-digit NAICS FEs",  "", "","", "$\\checkmark$", "", "","", "$\\checkmark$")),
                     omit.stat = c("rsq", "ser"),
                     #omit = c("same_state", "cba", "majority", "subcom_chair", "com_chair", "power_com", "majority_leader",  "minority_leader"),
                     column.sep.width = "3pt", notes = "Robust Standard Errors. All Estimates are OLS.", notes.align = "l")





#### clean up ####
rm(list = ls())






